"""
@FileName：世事杂谈.py
@Description：

@Author：HeYiQing
@Time：2023/11/15 19:46
"""
import openpyxl
import requests
from lxml import etree
# title url
if __name__ == '__main__':
    #UA伪装
    headers = {
        'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
    }
    # 定义一个字典存放title与url
    all_articles_title_url = {}
    all_avatar_title = {}

    #指定url
    for page in range(1,11):
        url = 'http://www.dili360.com/bbs/column/6542/' +str(page) + '.htm'
        #发起请求
        page_text = requests.get(url=url, headers=headers).text
        tree = etree.HTML(page_text)
        #拿到所有的li标签
        all_li_list = tree.xpath('//div[@class="right"]/ul/li')
        #解析到了当前页面的所有文章
        for li in all_li_list:
            article_avatar = li.xpath('./div[@class="thumb"]//img/@src')[0]
            article_title = li.xpath('./div[@class="detail"]/h3/a/text()')[0]
            article_url = 'http://www.dili360.com' + li.xpath('./div[@class="detail"]/h3/a/@href')[0]
            all_articles_title_url.update({article_title: article_url})
            all_avatar_title.update({article_title: article_avatar})
            print(article_title + '-----加载完成')
    #创建Excel并保存数据
    # 创建一个新的工作簿
    wb = openpyxl.Workbook()
    # 创建时，会自动产生一个sheet，通过active获取
    sh = wb.active

    sh.title = 'title_url'

    # 增加一个名为 '年龄表' 的sheet，放在最后
    #sh1 = book.create_sheet('年龄表-最后')
    # 增加一个 sheet，放在最前
    #sh2 = book.create_sheet('年龄表-最前', 0)

    # 写标题栏
    sh['A1'] = 'title'
    sh['B1'] = 'url'

    # 写入内容
    row = 2

    for title, url in all_articles_title_url.items():
        sh.cell(row, 1).value = title
        sh.cell(row, 2).value = url
        row += 1
    # 保存工作簿到文件
    wb.save('../../repo/dili360/excel/世事杂谈.xlsx')
    print('~~~~~title-url---全部下载完成~~~~~')

    sheet2 = wb.create_sheet(title="title_avatar")
    sheet2['A1'] = 'title'
    sheet2['B1'] = 'avatar'
    row = 2
    for title, avatar in all_avatar_title.items():
        sheet2.cell(row, 1).value = title
        sheet2.cell(row, 2).value = avatar
        row += 1
    wb.save('../../repo/dili360/excel/世事杂谈.xlsx')
    print('~~~~~title-avatar---全部下载完成~~~~~')